In [1]:
%matplotlib inline
import json
import os
import math
from collections import OrderedDict
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
from pathlib import Path
from data import CodesInDbs, Mappings, Databases
from IPython.display import Latex

pd.set_option('display.max_colwidth', 100)

sns.set_style('whitegrid')
sns.set_context("paper", font_scale=2)
#plt.rcParams['figure.figsize'] = (4, 3)
#plt.rc("savefig", dpi=150)

measures_palette = sns.color_palette('Set1', n_colors=2, desat=.5)
measures_palette.reverse()

def graded_recall_palette(n_colors, rev=True):
    palette = sns.color_palette("Blues", n_colors=n_colors, desat=.6)
    if rev:
        palette.reverse()
    return palette

def graded_precision_palette(n_colors, rev=True):
    palette = sns.color_palette("Reds", n_colors=n_colors, desat=.6)
    if rev:
        palette.reverse()
    return palette

def mystyle(palette=None, xrot=0, ha='center', ylim=(0,1), ylabel=None, savefig=None):
    class C:
        def __enter__(self):
            if palette is not None:
                palette.__enter__()
        def __exit__(self, exc_type, value, traceback):
            if palette is not None:
                palette.__exit__(exc_type, value, traceback)
            if exc_type is None:
                ax = plt.gca()
                sns.despine(left=True, ax=ax)
                ax.grid(False, axis='x')
                if ax.legend_: 
                    lgd = ax.legend(loc=2, bbox_to_anchor=(1, 1))
                else:
                    lgd = None
                if ax.get_lines():
                    ax.get_lines()[0].set_visible(False)
                ax.set_ylim(*ylim)
                plt.xticks(rotation=xrot, ha=ha)
                if ylabel is not None:
                    ax.yaxis.label(ylabel)
                if savefig:
                    filename = '{}-{}'.format(PROJECT, savefig)
                    plt.savefig(filename, bbox_extra_artists=[lgd] if lgd else [], bbox_inches='tight')
    return C()

def draw_lines(ys, palette=None):
    if palette is None:
        palette = sns.color_palette()
    ax = plt.gca()
    ax.axhline(0, 0, 0) # First axhline is not visible??
    for y, color in zip(ys, palette):
        ax.axhline(y, color=color, zorder=-100)
    return ax

"""
pd.set_option('display.notebook_repr_html', True)
def _repr_latex_(self):
    #return r"\begin{center}%s\end{center}" % 
    return self.to_latex()
pd.DataFrame._repr_latex_ = _repr_latex_  # monkey patch pandas DataFrame
"""

PROJECT = 'safeguard' # os.getenv('COMAP_PROJECT')
print("PROJECT:", PROJECT)


PROJECT: safeguard

In [2]:
with open('../projects/{}/config.yaml'.format(PROJECT)) as f:
    config = yaml.load(f)
    databases = Databases.of_config(config)
    coding_systems = config['coding-systems']
    events = config["events"]
    variations = config['variations']
    event_names = {}
    for event in events:
        casedef = yaml.load(open('../projects/{}/case-definitions/{}.yaml'.format(PROJECT, event)))
        event_names[event] = casedef['name']

with open('../{}.mappings.json'.format(PROJECT)) as f:
    mappings = Mappings.of_data(json.load(f))

with open('../codes-in-dbs.json') as f:
    codes_in_dbs = CodesInDbs.of_data(json.load(f))

coding_systems = ["ICD-9", "ICD-10", "ICPC-2", "READ-2"]
    
def database_label(database):
    #return database
    #return "{} ({})".format(database, databases.coding_system(database))
    return {
        "ICD10CM": "ICD-10",
        "ICD10/CM": "ICD-10",
        "RCD2": "READ-2",
        "ICPC2EENG": "ICPC-2",
        "ICD9CM": "ICD-9",
    }[databases.coding_system(database)]

def measure_label(measure):
    return {
        "recall": "Sensitivity",
        "precision": "PPV", # "Positive predictive value",
    }[measure]

def event_label(event):
    return event_names[event]

def len_if_notnull(x):
    if x != x:
        return 0
    else:
        return len(x)

Load evaluations ev


In [3]:
ev = pd.read_csv('../{}.evaluations.csv'.format(PROJECT))
for key in ['generated', 'reference', 'tp', 'fp', 'fn']:
    ev[key] = ev[key].map(lambda x: x if x != x else json.loads(x))
ev['variation event database recall precision'.split()].head()


Out[3]:
variation event database recall precision
0 4-RN-RB.expand bc GePaRD 1.000000 0.400000
1 4-RN-RB.expand bc IPCI 1.000000 0.500000
2 4-RN-RB.expand bc Medicare 1.000000 0.387097
3 4-RN-RB.expand bc CPRD 0.582418 0.841270
4 4-RN-RB.expand mi GePaRD 1.000000 0.636364

Mappings


In [4]:
df_m = mappings.describe()
df_m.index = df_m.index.map(database_label)
df_m.columns = df_m.columns.map(event_label)
df_m.index.name = 'Inclusion codes'
df_m['Sum'] = df_m.iloc[:4,:7].sum(axis=1)
df_m['Average'] = df_m.iloc[:4,:7].mean(axis=1).round(2)
#df.ix['Sum'] = df.iloc[:4, :7].sum()
#df.ix['Average'] = df.iloc[:4, :7].mean().round(2)
#df.ix['Sum']['Sum'] = df['Sum'].sum()
#df.ix['Average']['Average'] = df['Average'].mean()
df_m.fillna('-').T[['ICD-9', 'ICD-10', 'ICPC-2', 'READ-2']]


Out[4]:
Inclusion codes ICD-9 ICD-10 ICPC-2 READ-2
Acute Pancreatitis 1 6 1 7
Bladder cancer 12 12 1 91
Hemorrhagic stroke 3 22 1 36
Ischemic stroke 10 11 2 20
Myocardial Infarction 11 7 1 -
Pancreatic Cancer 8 9 1 109
Ventricular Arrhythmia 5 5 1 27
Sum 50 72 8 290
Average 7.14 10.29 1.14 48.33

In [5]:
df_e = mappings.describe(exclusions=True)
df_e.index = df_e.index.map(database_label)
df_e.columns = df_e.columns.map(event_label)
df_e.index.name = 'Exclusion codes'
df_e['Sum'] = df_e.iloc[:4,:7].sum(axis=1)
df_e['Average'] = df_e.iloc[:4,:7].mean(axis=1).round(2)
df_e.fillna('-').T[['ICD-9', 'ICD-10', 'ICPC-2', 'READ-2']]


Out[5]:
Exclusion codes ICD-9 ICD-10 ICPC-2 READ-2
Acute Pancreatitis - - - -
Bladder cancer - - 3 -
Hemorrhagic stroke 2 2 2 -
Ischemic stroke - - 1 -
Myocardial Infarction 10 - 6 -
Pancreatic Cancer - - 1 -
Ventricular Arrhythmia - - 1 -
Sum 12 2 14 -
Average 6 2 2.33 -

In [6]:
def combine_pair(t):
    return '{} ({})'.format(t.inc, t.exc)
                                  
def combine_row(inc, exc):
    return (pd.DataFrame({'inc': inc.fillna('-'), 'exc': exc.fillna('-')})
              .apply(combine_pair, axis=1))

df = df_m.astype('float64').combine(df_e.astype('float64'), combine_row)
df = df.T[['ICD-9', 'ICD-10', 'ICPC-2', 'READ-2']]
df.index.name = 'Events'
df


Out[6]:
Inclusion codes ICD-9 ICD-10 ICPC-2 READ-2
Events
Acute Pancreatitis 1.0 (-) 6.0 (-) 1.0 (-) 7.0 (-)
Bladder cancer 12.0 (-) 12.0 (-) 1.0 (3.0) 91.0 (-)
Hemorrhagic stroke 3.0 (2.0) 22.0 (2.0) 1.0 (2.0) 36.0 (-)
Ischemic stroke 10.0 (-) 11.0 (-) 2.0 (1.0) 20.0 (-)
Myocardial Infarction 11.0 (10.0) 7.0 (-) 1.0 (6.0) - (-)
Pancreatic Cancer 8.0 (-) 9.0 (-) 1.0 (1.0) 109.0 (-)
Ventricular Arrhythmia 5.0 (-) 5.0 (-) 1.0 (1.0) 27.0 (-)
Sum 50.0 (12.0) 72.0 (2.0) 8.0 (14.0) 290.0 (-)
Average 7.14 (6.0) 10.29 (2.0) 1.14 (2.33) 48.33 (-)

Notes

Should exclusion codes from the reference be generated?

No. Exclusion codes are often added database specifically, where the codes are not represented in the case definition.

Coding systems


In [7]:
pd.DataFrame([
    (database, databases.coding_system(database), database_label(database))
    for database in databases.databases()
], columns=("Database", "Coding system", "Label")).set_index("Database")


Out[7]:
Coding system Label
Database
Medicare ICD9CM ICD-9
IPCI ICPC2EENG ICPC-2
CPRD RCD2 READ-2
GePaRD ICD10/CM ICD-10

Baseline-0

DISO filtering for concepts

Number / Percentage of concepts with true positive codes overall (All) and in semantic group disorders (DISO).


In [8]:
types_distr = pd.DataFrame(json.load(open('../{}.types-distrs.json'.format(PROJECT)))).T

df = pd.DataFrame()
df['All'] = types_distr.groupby('group')[['pos', 'neg']].sum().sum()
df['All %'] =  df['All'] / df['All'].sum()
df['DISO'] = types_distr.groupby('group')[['pos', 'neg']].sum().ix['DISO']
df['DISO %'] = df['DISO'] / df['DISO'].sum()
df


Out[8]:
All All % DISO DISO %
pos 14 0.177215 14 0.304348
neg 65 0.822785 32 0.695652

Number of concepts in each mapping


In [9]:
df = (ev[ev.variation == 'baseline0'].
      groupby('event').
      first().
      cuis.
      map(json.loads).
      map(len).
      to_frame('#CUIs'))
df.index = df.index.map(event_label)
df.ix['SUM'] = df['#CUIs'].sum()
df.T


Out[9]:
Acute Pancreatitis Bladder cancer Hemorrhagic stroke Ischemic stroke Myocardial Infarction Pancreatic Cancer Ventricular Arrhythmia SUM
#CUIs 8 15 7 7 9 8 23 77

Number of generated, reference codes and confusion by coding system.


In [10]:
df = ev[ev.variation == 'baseline0'][['event', 'database', 'generated', 'reference', 'tp', 'fp', 'fn']]
for key in ['generated', 'reference', 'tp', 'fp', 'fn']:
    df[key] = df[key].map(len_if_notnull)
df['database'] = df['database'].map(database_label)
df.groupby('database').sum()


Out[10]:
generated reference tp fp fn
database
ICD-10 32 72 11 21 61
ICD-9 22 50 9 13 41
ICPC-2 7 8 3 4 5
READ-2 49 290 25 24 265

In [11]:
df = pd.DataFrame([
    ev[ev.variation == 'baseline0'].groupby('database').recall.mean(),
    ev[ev.variation == 'baseline0'].groupby('database').precision.mean(),
])
df.index = df.index.map(measure_label)
df.columns = df.columns.map(database_label)
df = df[coding_systems]
df['Average'] = df.mean(axis=1)

with mystyle(measures_palette, savefig='baseline-performance-by-db.pdf'):
    with sns.plotting_context(font_scale=1):
        ax = draw_lines(df['Average'])
        df.iloc[:,:-1].T.plot(kind='bar', title='Baseline_0', ax=ax)

baseline0_performance = df

df.round(3)


Out[11]:
ICD-9 ICD-10 ICPC-2 READ-2 Average
Sensitivity 0.300 0.195 0.357 0.131 0.246
PPV 0.322 0.380 0.500 0.373 0.394

Baseline


In [12]:
df = ev[ev.variation == 'baseline'][['event', 'database', 'generated', 'reference', 'tp', 'fp', 'fn']]
for key in 'generated reference tp fp fn'.split():
    df[key] = df[key].map(len_if_notnull)
df['database'] = df['database'].map(database_label)
df.groupby('database').sum()


Out[12]:
generated reference tp fp fn
database
ICD-10 32 72 11 21 61
ICD-9 21 50 9 12 41
ICPC-2 6 8 3 3 5
READ-2 44 290 25 19 265

Number of concepts in the mapping


In [13]:
df = (ev[ev.variation == 'baseline'].
      groupby('event').
      first().
      cuis.
      map(json.loads).
      map(len).
      to_frame('#CUIs'))
df.index = df.index.map(event_label)
df.ix['SUM'] = df['#CUIs'].sum()
df.T


Out[13]:
Acute Pancreatitis Bladder cancer Hemorrhagic stroke Ischemic stroke Myocardial Infarction Pancreatic Cancer Ventricular Arrhythmia SUM
#CUIs 5 9 4 4 3 5 16 46

In [14]:
df = pd.DataFrame([
    ev[ev.variation == 'baseline'].groupby('database').recall.mean(),
    ev[ev.variation == 'baseline'].groupby('database').precision.mean(),
])
df.index = df.index.map(measure_label)
df.columns = df.columns.map(database_label)
df = df[coding_systems]
df['Average'] = df.mean(axis=1)

with mystyle(measures_palette, savefig='baseline-performance-by-db.pdf'):
    with sns.plotting_context(font_scale=1):
        ax = draw_lines(df['Average'])
        df.iloc[:,:-1].T.plot(kind='bar', title='Baseline', ax=ax)

baseline_performance = df

df.round(3)


Out[14]:
ICD-9 ICD-10 ICPC-2 READ-2 Average
Sensitivity 0.300 0.195 0.357 0.131 0.246
PPV 0.387 0.380 0.500 0.411 0.420

In [15]:
df = pd.DataFrame([
    ev[ev.variation == 'baseline'].groupby('event').recall.mean(),
    ev[ev.variation == 'baseline'].groupby('event').precision.mean(),
])
df.index = df.index.map(measure_label)
df.columns = df.columns.map(event_label)
df['Average'] = df.mean(axis=1)

with mystyle(measures_palette, xrot=45, ha='right', savefig='baseline-performance-by-event.pdf'):
    ax = draw_lines(df['Average'])
    df.iloc[:,:-1].T.plot(kind='bar', title='Baseline', ax=ax)
    
df.round(3)


Out[15]:
Acute Pancreatitis Bladder cancer Hemorrhagic stroke Ischemic stroke Myocardial Infarction Pancreatic Cancer Ventricular Arrhythmia Average
Sensitivity 0.399 0.402 0.0 0.170 0.048 0.384 0.296 0.243
PPV 0.211 0.522 0.0 0.667 0.500 0.625 0.335 0.409

Max-recall


In [16]:
df = ev[ev.variation == 'max-recall'][['event', 'database', 'generated', 'reference', 'tp', 'fp', 'fn']]
for key in ['generated', 'reference', 'tp', 'fp', 'fn']:
    df[key] = df[key].map(len_if_notnull)
df['database'] = df['database'].map(database_label)
df = df.groupby('database').sum()
df.ix['Overall'] = df.sum()
df['fn/reference'] = df['fn'] / df['reference']
#df['tp/generated'] = 1 - (df.tp / df.generated).round(3)
df.round(3)


Out[16]:
generated reference tp fp fn fn/reference
database
ICD-10 148 72 72 76 0 0.000
ICD-9 107 50 50 57 0 0.000
ICPC-2 10 8 8 2 0 0.000
READ-2 310 290 279 31 11 0.038
Overall 575 420 409 166 11 0.026

In [17]:
df = pd.DataFrame([
    ev[ev.variation == 'max-recall'].groupby('database').recall.mean(),
    ev[ev.variation == 'max-recall'].groupby('database').precision.mean(),
])
df.index = df.index.map(measure_label)
df.columns = df.columns.map(database_label)
df = df[coding_systems]
df['Average'] = df.mean(axis=1)

with mystyle(measures_palette, ylim=(0,1), savefig='max-recall-performance-by-db.pdf'):
    ax = draw_lines(df['Average'])
    df.iloc[:,:-1].T.plot(kind='bar', title='Maximum recall', ax=ax)
    
maxrecall_performance = df
    
df.round(3)


Out[17]:
ICD-9 ICD-10 ICPC-2 READ-2 Average
Sensitivity 1.000 1.000 1.000 0.966 0.991
PPV 0.612 0.608 0.857 0.855 0.733

In [18]:
df = pd.DataFrame([
    ev[ev.variation == 'max-recall'].groupby('database').recall.mean(),
])
df.index = df.index.map(measure_label)
df.columns = df.columns.map(database_label)
df = df[coding_systems]
df['Average'] = df.mean(axis=1)

with mystyle(measures_palette, ylim=(.9, 1), savefig='max-recall-recall-by-db.pdf'):
    ax = draw_lines(df['Average'])
    df.iloc[:,:-1].T.plot(kind='bar', legend=False, title='Maximum recall', ax=ax)
    plt.ylabel(measure_label('recall'))
    
df.round(3)


Out[18]:
ICD-9 ICD-10 ICPC-2 READ-2 Average
Sensitivity 1.0 1.0 1.0 0.966 0.991

Reasons for imperfect sensitivity


In [19]:
with open('../{}.code-stats.csv'.format(PROJECT)) as f:
    code_stats = pd.read_csv(f)
    
stats = pd.DataFrame()
stats['Mapping'] = (code_stats[code_stats.InMapping]
                    .groupby('Database')
                    .Code.count())
stats['Not maximum-recall'] = (code_stats[code_stats.InMapping & ~code_stats.InDnf]
                               .groupby('Database')
                               .Code.count())
stats = stats.fillna(0)
stats['% of missing'] = (stats['Not maximum-recall'] / stats['Not maximum-recall'].sum()).map("{:.2%}".format)
stats['% of mapping'] = (stats['Not maximum-recall'] / stats['Mapping']).map("{:.2%}".format)
stats.index = stats.index.map(database_label)
stats


Out[19]:
Mapping Not maximum-recall % of missing % of mapping
READ-2 229 7.0 100.00% 3.06%
ICD-10 72 0.0 0.00% 0.00%
ICPC-2 7 0.0 0.00% 0.00%
ICD-9 50 0.0 0.00% 0.00%

In [20]:
max_recall_fn = ev[(ev.variation == 'max-recall') & (ev.recall < 1)][["database", "fn"]]
max_recall_fn.database = max_recall_fn.database.map(database_label)
max_recall_fn = max_recall_fn.groupby('database').fn.sum().to_frame('fn')
max_recall_fn['fn'] = max_recall_fn['fn'].map(lambda x: set() if x != x else set(x)).map(', '.join)
max_recall_fn.index.name = 'Database'
max_recall_fn.columns = ['False negatives of maximum recall']
max_recall_fn


Out[20]:
False negatives of maximum recall
Database
READ-2 BBa.., BBaz., G64z4, 7L1H7, 1O0.., 7L1H6, BBd9.

CPRD: READ2 codes from the reference are mapped to READ CTV3 codes that are not in UMLS, for example 7L1H6 (READ2) -> XaM3E, XaPuP, 7L1H6, 7L1h6.


In [21]:
averages_compare = pd.DataFrame([
    ev[ev.variation == 'max-recall'].groupby('event').recall.mean(),
    ev[ev.variation == 'max-recall'].groupby('event').precision.mean(),
])
averages_compare.index = averages_compare.index.map(measure_label)
averages_compare.columns = averages_compare.columns.map(event_names.get)
averages_compare['Average'] = averages_compare.mean(axis=1)

with mystyle(measures_palette, xrot=45, ha='right', savefig='max-recall-by-event.pdf'):
    ax = draw_lines(averages_compare['Average'])
    averages_compare.iloc[:,:-1].T.plot(kind='bar', title="Maximum recall", ax=ax)
averages_compare.round(3)


Out[21]:
Acute Pancreatitis Bladder cancer Hemorrhagic stroke Ischemic stroke Myocardial Infarction Pancreatic Cancer Ventricular Arrhythmia Average
Sensitivity 1.000 0.989 1.00 0.988 1.000 0.991 0.981 0.993
PPV 0.688 0.514 0.91 0.827 0.972 0.464 0.785 0.737

Compare relations for expansion


In [22]:
compare_variations = OrderedDict([
    ('3-RN-RB.expand', 'Expand 3 RN, RB'),
    ('3-CHD-PAR.expand', 'Expand 3 PAR, CHD'),
    ('3-RN-CHD-RB-PAR.expand', 'Expand 3 RN, CHD, RB, PAR'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('database').recall.mean()
    for variation in compare_variations
], index=compare_variations)

averages_compare.columns = averages_compare.columns.map(database_label)
averages_compare.index = compare_variations.values()
averages_compare = averages_compare[coding_systems]
averages_compare['Average'] = averages_compare.mean(axis=1)

with mystyle(graded_recall_palette(len(compare_variations), rev=0), savefig='relations-recall-by-db.pdf'):
    ax = draw_lines(averages_compare['Average'])
    averages_compare.iloc[:, :-1].T.plot(kind='bar', title="Relations in expansion step 3", ax=ax)
    plt.ylabel(measure_label('recall'))
print(averages_compare)


                              ICD-9    ICD-10  ICPC-2    READ-2   Average
Expand 3 RN, RB            0.901299  0.912698     1.0  0.703094  0.879273
Expand 3 PAR, CHD          0.900000  0.993506     1.0  0.821903  0.928852
Expand 3 RN, CHD, RB, PAR  0.928571  1.000000     1.0  0.882328  0.952725

In [23]:
compare_variations = OrderedDict([
    ('3-RN-RB.expand', 'Expand 3 RN, RB'),
    ('3-CHD-PAR.expand', 'Expand 3 PAR, CHD'),
    ('3-RN-CHD-RB-PAR.expand', 'Expand 3 RN, CHD, RB, PAR'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('database').precision.mean()
    for variation in compare_variations
], index=compare_variations)

averages_compare.columns = averages_compare.columns.map(database_label)
averages_compare.index = compare_variations.values()
averages_compare = averages_compare[coding_systems]
averages_compare['Average'] = averages_compare.mean(axis=1)

with mystyle(graded_recall_palette(len(compare_variations), rev=0), savefig='relations-recall-by-db.pdf'):
    ax = draw_lines(averages_compare['Average'])
    averages_compare.iloc[:, :-1].T.plot(kind='bar', title="Relations in expansion step 3", ax=ax)
    plt.ylabel(measure_label('precision'))
print(averages_compare)


                              ICD-9    ICD-10    ICPC-2    READ-2   Average
Expand 3 RN, RB            0.485195  0.520532  0.785714  0.745774  0.634304
Expand 3 PAR, CHD          0.446468  0.509343  0.761905  0.728762  0.611620
Expand 3 RN, CHD, RB, PAR  0.462319  0.497555  0.761905  0.742428  0.616052

In [24]:
compare_variations = OrderedDict([
    ('4-RN-RB.expand', 'Expand 4 RN, RB'),
    ('4-CHD-PAR.expand', 'Expand 4 PAR, CHD'),
    ('4-RN-CHD-RB-PAR.expand', 'Expand 4 RN, CHD, RB, PAR'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('database').recall.mean()
    for variation in compare_variations
], index=compare_variations)

averages_compare.columns = averages_compare.columns.map(database_label)
averages_compare.index = compare_variations.values()
averages_compare = averages_compare[coding_systems]
averages_compare['Average'] = averages_compare.mean(axis=1)

with mystyle(graded_recall_palette(len(compare_variations), rev=0), savefig='relations-recall-by-db.pdf'):
    ax = draw_lines(averages_compare['Average'])
    averages_compare.iloc[:, :-1].T.plot(kind='bar', title="Relations in expansion step 4", ax=ax)
    plt.ylabel(measure_label('recall'))
print(averages_compare)


                              ICD-9    ICD-10  ICPC-2    READ-2   Average
Expand 4 RN, RB            0.901299  0.912698     1.0  0.703094  0.879273
Expand 4 PAR, CHD          0.900000  0.993506     1.0  0.830153  0.930915
Expand 4 RN, CHD, RB, PAR  0.928571  1.000000     1.0  0.883857  0.953107

In [25]:
compare_variations = OrderedDict([
    ('baseline', 'Baseline'),
    ('1-RN-RB.expand', 'RN, RB'),
    ('1-RN-CHD.expand', 'RN, CHD'),
    ('1-RB-PAR.expand', 'RB, PAR'),
    ('1-PAR-CHD.expand', 'PAR, CHD'),
    ('1-RN-CHD-RB-PAR.expand', 'RN, CHD, RB, PAR'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('database').recall.mean()
    for variation in compare_variations
], index=compare_variations)

averages_compare.columns = averages_compare.columns.map(database_label)
averages_compare.index = compare_variations.values()
averages_compare = averages_compare[coding_systems]
averages_compare['Average'] = averages_compare.mean(axis=1)

with mystyle(graded_recall_palette(len(compare_variations), rev=0), savefig='relations-recall-by-db.pdf'):
    ax = draw_lines(averages_compare['Average'])
    averages_compare.iloc[:, :-1].T.plot(kind='bar', title="Relations for expansion", ax=ax)
    plt.ylabel(measure_label('recall'))



In [26]:
compare_variations = OrderedDict([
    ('baseline', 'Baseline'),
    ('1-RN-RB.expand', 'RN, RB'),
    ('1-RN-CHD.expand', 'RN, CHD'),
    ('1-RB-PAR.expand', 'RB, PAR'),
    ('1-PAR-CHD.expand', 'PAR, CHD'),
    ('1-RN-CHD-RB-PAR.expand', 'RN, CHD, RB, PAR'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('event').recall.mean()
    for variation in compare_variations
], index=compare_variations)
averages_compare.columns = averages_compare.columns.map(event_names.get)
averages_compare.index = compare_variations.values()
averages_compare['Average'] = averages_compare.mean(axis=1)

with mystyle(graded_recall_palette(len(compare_variations), rev=0), xrot=45, ha='right', savefig='relations-recall-by-event.pdf'):
    ax = draw_lines(averages_compare['Average'])
    averages_compare.iloc[:,:-1].T.plot(kind='bar', title="Relations for expansion", ax=ax)
    plt.ylabel(measure_label('recall'))


Increasing sensitivity with more expansion steps


In [27]:
variations_names = OrderedDict([
    ('baseline', 'baseline'),
    ('1-RN-CHD-RB-PAR.expand', 'expand$_1$'),
    ('2-RN-CHD-RB-PAR.expand', 'expand$_2$'),
    ('3-RN-CHD-RB-PAR.expand', 'expand$_3$'),
    ('4-RN-CHD-RB-PAR.expand', 'expand$_4$'),
])
df = pd.DataFrame({
    name: ev[ev.variation == variation].groupby('database').recall.mean()
    for variation, name in variations_names.items()
}).T
df.columns = df.columns.map(database_label)
df = df[coding_systems]
df['Average'] = df.mean(axis=1)

with mystyle(graded_recall_palette(len(variations_names), rev=0), savefig='steps-recall-by-db.pdf'):
    ax = draw_lines(df['Average'])
    df.iloc[:-1,:-1].T.plot(kind='bar', ax=ax)
    plt.ylabel(measure_label('recall'))
    
df.round(3)


Out[27]:
ICD-9 ICD-10 ICPC-2 READ-2 Average
baseline 0.300 0.195 0.357 0.131 0.246
expand$_1$ 0.858 0.848 1.000 0.568 0.818
expand$_2$ 0.914 1.000 1.000 0.846 0.940
expand$_3$ 0.929 1.000 1.000 0.882 0.953
expand$_4$ 0.929 1.000 1.000 0.884 0.953

Reasons for low performance in IPCI when including exclusion codes

Exclusion codes are not in the evaluation any more. See note above.

The IPCI mapping contains very broad codes that are refined with additional terms. For example

  • K24 (Fear of heart attack)
  • K90 (stroke)
  • K93 (Pulmonary embolism)
  • D70 (Dementia) OR "dementia" AND "infarct"
  • U14 (Kidney symptom/complaint ) OR "nier" AND "infarct"

In [28]:
expands_performance = OrderedDict()
for i in [1,2,3,4]:
    v = '{}-RN-CHD-RB-PAR.expand'.format(i)
    df = pd.DataFrame([
        ev[ev.variation == v].groupby('database').recall.mean(),
        ev[ev.variation == v].groupby('database').precision.mean(),
    ])
    df.index = df.index.map(measure_label)
    df.columns = df.columns.map(database_label)
    df = df[coding_systems]
    df['Average'] = df.mean(axis=1)

    #with mystyle(measures_palette, ylim=(0,1), savefig='max-recall-performance-by-db.pdf'):
    #    ax = draw_lines(df['Average'])
    #    df.iloc[:,:-1].T.plot(kind='bar', title='Maximum recall', ax=ax)

    expands_performance['expand_{}'.format(i)] = df

In [29]:
num_concepts = pd.Series(OrderedDict([
    (var_name, ev[(ev.variation == var) & (ev.cuis.notnull())]
                 .groupby('event').first()
                 .cuis.map(json.loads).map(len)
                 .sum())
    for var_name, var in [('baseline0', 'baseline0'), ('baseline', 'baseline')] + \
        [('expand_{}'.format(i), '{}-RN-CHD-RB-PAR.expand'.format(i)) for i in range(1,5)] + \
        [(('max-sensitivity', 'max-recall'))]
])).to_frame('Concepts')
num_concepts


Out[29]:
Concepts
baseline0 77
baseline 46
expand_1 183
expand_2 297
expand_3 335
expand_4 337
max-sensitivity 285

In [30]:
performances = OrderedDict()
performances['baseline_0'] = baseline0_performance
performances['baseline'] = baseline_performance
for v in expands_performance:
    performances[v] = expands_performance[v]
performances['max_sensitivity'] = maxrecall_performance
performances_df = pd.concat(performances).round(3)
performances_df


Out[30]:
ICD-9 ICD-10 ICPC-2 READ-2 Average
baseline Sensitivity 0.300 0.195 0.357 0.131 0.246
PPV 0.387 0.380 0.500 0.411 0.420
baseline_0 Sensitivity 0.300 0.195 0.357 0.131 0.246
PPV 0.322 0.380 0.500 0.373 0.394
expand_1 Sensitivity 0.858 0.848 1.000 0.568 0.818
PPV 0.483 0.558 0.762 0.729 0.633
expand_2 Sensitivity 0.914 1.000 1.000 0.846 0.940
PPV 0.463 0.509 0.762 0.749 0.621
expand_3 Sensitivity 0.929 1.000 1.000 0.882 0.953
PPV 0.462 0.498 0.762 0.742 0.616
expand_4 Sensitivity 0.929 1.000 1.000 0.884 0.953
PPV 0.462 0.497 0.762 0.743 0.616
max_sensitivity Sensitivity 1.000 1.000 1.000 0.966 0.991
PPV 0.612 0.608 0.857 0.855 0.733

In [31]:
s = (performances_df
     .set_index(performances_df.index.rename(['Variation', 'Measurement']))
     .stack()
     .reset_index()
     .rename(columns={'level_2': 'Terminology', 0: 'Value'})
    )
s.head()


Out[31]:
Variation Measurement Terminology Value
0 baseline Sensitivity ICD-9 0.300
1 baseline Sensitivity ICD-10 0.195
2 baseline Sensitivity ICPC-2 0.357
3 baseline Sensitivity READ-2 0.131
4 baseline Sensitivity Average 0.246

In [170]:
ev1 = (ev[['cuis', 'variation', 'event', 'database', 'recall', 'precision', 'tp', 'fp', 'fn']]
       [ev.variation.isin(['baseline']+['{}-RN-CHD-RB-PAR.expand'.format(n) for n in [1,2,3,4]])]
       .replace({'variation': {'baseline': '0-baseline'}})
       .replace({'variation': {'{}-RN-CHD-RB-PAR.expand'.format(n): "{}-expansion".format(n) for n in [1,2,3,4]}})
       .sort_values(by=['variation', 'database', 'event'])
       .copy())
for f in ['cuis', 'generated', 'reference', 'tp', 'fp', 'fn']:
    ev1[f] = ev[f].fillna('').map(len)#lambda x: len(x) if x == x else '-')
ev1.head()


Out[170]:
cuis variation event database recall precision tp fp fn generated reference
387 60 0-baseline ap CPRD 0.428571 0.300000 3 7 4 10 7
367 108 0-baseline bc CPRD 0.109890 0.714286 10 4 81 14 91
391 48 0-baseline hs CPRD 0.000000 0.000000 0 1 36 1 36
375 48 0-baseline is CPRD 0.000000 0.000000 0 1 20 1 20
371 0 0-baseline mi CPRD NaN NaN 0 0 0 0 0

In [164]:
ev2 = ev1.groupby(['variation', 'database']).aggregate(OrderedDict([
            ('recall', np.mean),
            ('precision', np.mean),
            ('cuis', sum),
            ('generated', sum), ('reference', sum), ('tp', sum), ('fp', sum), ('fn', sum)
]))

ev2.head()


Out[164]:
recall precision cuis generated reference tp fp fn
variation database
0-baseline CPRD 0.131311 0.411472 516 44 290 25 19 265
GePaRD 0.194795 0.380102 552 32 72 11 21 61
IPCI 0.357143 0.500000 552 6 8 3 3 5
Medicare 0.300000 0.386667 552 21 50 9 12 41
1-expansion CPRD 0.567532 0.728883 2028 176 290 139 37 151

Verification of macro-average performance measures


In [165]:
(ev1[['variation', 'database', 'event', 'generated' ,'reference', 'tp']]
 .assign(precision1=lambda df: df.tp / df.generated)
 .assign(recall1=lambda df: df.tp / df.reference)
 .groupby(('variation', 'database'))
 .aggregate(OrderedDict([('recall1', np.mean), ('precision1', np.mean)]))
 .reset_index()
 .groupby('variation')
 .aggregate(OrderedDict([('recall1', np.mean), ('precision1', np.mean)]))
 .round(2))


Out[165]:
recall1 precision1
variation
0-baseline 0.25 0.42
1-expansion 0.82 0.63
2-expansion 0.94 0.62
3-expansion 0.95 0.62
4-expansion 0.95 0.62

Micro-average performance measures


In [166]:
(ev1
 .groupby('variation')
 .aggregate(OrderedDict([('recall', np.mean), ('precision', np.mean)]))
 .round(2))


Out[166]:
recall precision
variation
0-baseline 0.25 0.41
1-expansion 0.83 0.63
2-expansion 0.94 0.62
3-expansion 0.96 0.61
4-expansion 0.96 0.61

Micro-performance measures


In [230]:
ev3a = (ev1
       .groupby(['variation', 'database'])
       .aggregate(dict({key: lambda s: s.fillna(0).sum() for key in 'generated reference tp fp fn'.split()}, cuis=np.mean, **{'precision': np.mean, 'recall': np.mean}))
       .reset_index())

ev3b = (ev3a
  .groupby('variation')
  .aggregate(dict({key: lambda s: s.fillna(0).sum() for key in 'generated reference tp fp fn'.split()}, cuis=np.mean))
  ['cuis generated reference tp fp fn'.split()]
  .assign(recall=lambda df: df.tp / df.reference,
          precision=lambda df: df.tp / df.generated)
  .assign(database='ZZZ')
  .reset_index())

ev3 = (pd.concat([ev3a, ev3b])
 .sort_values(['variation', 'database'])
 ['variation database cuis generated reference tp fp fn recall precision'.split()]
 .set_index(['variation', 'database']))
       
ev3


Out[230]:
cuis generated reference tp fp fn recall precision
variation database
0-baseline CPRD 73.714286 44 290 25 19 265 0.131311 0.411472
GePaRD 78.857143 32 72 11 21 61 0.194795 0.380102
IPCI 78.857143 6 8 3 3 5 0.357143 0.500000
Medicare 78.857143 21 50 9 12 41 0.300000 0.386667
ZZZ 77.571429 103 420 48 55 372 0.114286 0.466019
1-expansion CPRD 289.714286 176 290 139 37 151 0.567532 0.728883
GePaRD 313.714286 113 72 58 55 14 0.847866 0.557956
IPCI 313.714286 12 8 8 4 0 1.000000 0.761905
Medicare 313.714286 83 50 40 43 10 0.858442 0.483336
ZZZ 307.714286 384 420 245 139 175 0.583333 0.638021
2-expansion CPRD 469.714286 291 290 239 52 51 0.845907 0.749152
GePaRD 509.142857 154 72 72 82 0 1.000000 0.508975
IPCI 509.142857 12 8 8 4 0 1.000000 0.761905
Medicare 509.142857 110 50 44 66 6 0.914286 0.463144
ZZZ 499.285714 567 420 363 204 57 0.864286 0.640212
3-expansion CPRD 533.142857 320 290 260 60 30 0.882328 0.742428
GePaRD 574.285714 168 72 72 96 0 1.000000 0.497555
IPCI 574.285714 12 8 8 4 0 1.000000 0.761905
Medicare 574.285714 119 50 45 74 5 0.928571 0.462319
ZZZ 564.000000 619 420 385 234 35 0.916667 0.621971
4-expansion CPRD 536.571429 321 290 261 60 29 0.883857 0.742634
GePaRD 577.714286 169 72 72 97 0 1.000000 0.496985
IPCI 577.714286 12 8 8 4 0 1.000000 0.761905
Medicare 577.714286 119 50 45 74 5 0.928571 0.462319
ZZZ 567.428571 621 420 386 235 34 0.919048 0.621578

In [167]:
(ev1
  .groupby('variation')
  .aggregate({key: lambda s: s.fillna(0).sum() for key in 'generated reference tp fp fn'.split()})
  .assign(recall=lambda df: df.tp / df.reference)
  .assign(precision=lambda df: df.tp / df.generated)
  ['recall precision generated reference tp fp fn'.split()]
  .round(2))


Out[167]:
recall precision generated reference tp fp fn
variation
0-baseline 0.11 0.47 103 420 48 55 372
1-expansion 0.58 0.64 384 420 245 139 175
2-expansion 0.86 0.64 567 420 363 204 57
3-expansion 0.92 0.62 619 420 385 234 35
4-expansion 0.92 0.62 621 420 386 235 34

In [32]:
# Remove [s.Terminology == 'Average'] for all terminologies
variation_names = {
    'baseline': 'Baseline',
    'baseline_0': None,
    'expand_1': '1 expansion step',
    'expand_2': '2 expansion steps',
    'expand_3': '3 expansion steps',
    'expand_4': None,
    'max_sensitivity': '(Maximum sensitivity)'
}
s1 = s.copy()
s1['Code sets'] = s1.Variation.map(variation_names)
s1 = s1[s1.Variation.notnull()]
g = (sns.factorplot(kind='bar', data=s1[s1.Terminology == 'Average'],
                x='Measurement', y='Value', col='Terminology', hue='Code sets',
                saturation=1, legend=True, legend_out=True, size=4, aspect=2,
                #palette=sns.color_palette("Set2", 7),
                palette=sns.color_palette("Set1", n_colors=5, desat=.5),
                hue_order=['Baseline', '1 expansion step', '2 expansion steps', '3 expansion steps', '(Maximum sensitivity)'])
  .set_titles('') #Performance (average over events and vocabularies)")
  .set_xlabels('')
  .set_ylabels('')
  .set(ylim=(0, 1))
  .despine(left=True))
for p in g.axes[0][0].patches:
    height = p.get_height()
    g.ax.text(p.get_x()+1/12, height-0.025, '%.2f' % height,
              fontsize=10, horizontalalignment='center', verticalalignment='top', color='white')
g.savefig('performance.pdf')



In [33]:
compare_variations = OrderedDict([
    ('baseline', 'Baseline'),
    ('1-RN-CHD-RB-PAR.expand', 'Expansion 1 step'),
    ('2-RN-CHD-RB-PAR.expand', 'Expansion 2 steps'),
    ('3-RN-CHD-RB-PAR.expand', 'Expansion 3 steps'),
    ('4-RN-CHD-RB-PAR.expand', 'Expansion 4 steps'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('database').precision.mean()
    for variation in compare_variations
], index=compare_variations)
averages_compare.columns = averages_compare.columns.map(database_label)
averages_compare.index = compare_variations.values()
averages_compare = averages_compare[coding_systems]
averages_compare['Average'] = averages_compare.mean(axis=1)

with mystyle(graded_precision_palette(len(compare_variations), rev=0), savefig='steps-precision-by-db.pdf'):
    ax = draw_lines(averages_compare['Average'])
    averages_compare.T.plot(kind='bar', title="Expansion steps", ax=ax)
    plt.ylabel(measure_label('precision'))
averages_compare.round(3)


Out[33]:
ICD-9 ICD-10 ICPC-2 READ-2 Average
Baseline 0.387 0.380 0.500 0.411 0.420
Expansion 1 step 0.483 0.558 0.762 0.729 0.633
Expansion 2 steps 0.463 0.509 0.762 0.749 0.621
Expansion 3 steps 0.462 0.498 0.762 0.742 0.616
Expansion 4 steps 0.462 0.497 0.762 0.743 0.616

In [34]:
compare_variations = OrderedDict([
    ('baseline', 'Baseline'),
    ('1-RN-CHD-RB-PAR.expand', '1 step'),
    ('2-RN-CHD-RB-PAR.expand', '2 steps'),
    ('3-RN-CHD-RB-PAR.expand', '3 steps'),
#    ('4-RN-CHD-RB-PAR.expand', '4 steps'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('event').recall.mean()
    for variation in compare_variations
], index=compare_variations)
averages_compare.columns = averages_compare.columns.map(event_names.get)
averages_compare.index = compare_variations.values()
averages_compare['Average'] = averages_compare.mean(axis=1)

with mystyle(graded_recall_palette(len(compare_variations), rev=0), xrot=45, ha='right', savefig='steps-recall-by-event.pdf'):
    ax = draw_lines(averages_compare['Average'])
    averages_compare.T.plot(kind='bar', title="Expansion steps", ax=ax)
    plt.ylabel(measure_label('recall'))



In [35]:
compare_variations = OrderedDict([
    ('baseline', 'Baseline'),
    ('1-RN-CHD-RB-PAR.expand', '1 step'),
    ('2-RN-CHD-RB-PAR.expand', '2 steps'),
    ('3-RN-CHD-RB-PAR.expand', '3 steps'),
#    ('4-RN-CHD-RB-PAR.expand', '4 steps'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('event').precision.mean()
    for variation in compare_variations
], index=compare_variations)
averages_compare.columns = averages_compare.columns.map(event_names.get)
averages_compare.index = compare_variations.values()
averages_compare['Average'] = averages_compare.mean(axis=1)

with mystyle(graded_precision_palette(len(compare_variations), rev=0), xrot=45, ha='right', savefig='steps-precision-by-event.pdf'):
    ax = draw_lines(averages_compare['Average'])
    averages_compare.T.plot(kind='bar', title="Expansion steps", ax=ax)
    plt.ylabel(measure_label('precision'))



In [36]:
measures = ['recall', 'precision']
averages_compare = pd.DataFrame([
    ev[ev.variation == '3-RN-CHD-RB-PAR.expand'].groupby('database')[measure].mean()
    for measure in measures
], index=map(measure_label, measures))
averages_compare.columns = averages_compare.columns.map(database_label)
averages_compare = averages_compare[coding_systems]
averages_compare['Average'] = averages_compare.mean(axis=1)

name = 'expansion3-performance-by-db'
with mystyle(measures_palette, savefig=name+'.pdf'):
    ax = draw_lines(averages_compare['Average'])
    averages_compare.iloc[:,:-1].T.plot(kind='bar', title="Performance of 3-step expansion", ax=ax)
averages_compare.to_csv(name+'.csv')


FN error-analysis


In [37]:
variation = '3-RN-CHD-RB-PAR.expand'

with open("../{}.{}.error-analyses.yaml".format(PROJECT, variation)) as f:
    error_analyses = yaml.load(f)
    
def get_category(fn_or_fp, database, event, code):
    if database in error_analyses[fn_or_fp] and event in error_analyses[fn_or_fp][database]:
        return error_analyses[fn_or_fp][database][event]['code-categories'].get(code) or '?'
    else:
        return '??'

evs = ev[(ev.variation == variation) & ev.fn.notnull()][['event', 'database', 'fn', 'fp']]

fn = evs.apply(lambda row: pd.Series(row.fn), axis=1).stack().reset_index(level=1, drop=True)
fn.name = 'code'
# fns : | event | database | code |
fns = evs.drop(['fn', 'fp'], axis=1).join(fn, how='inner').drop_duplicates()
fns['category'] = fns.apply(lambda r: get_category('fn', r.database, r.event, r.code), axis=1)

fp = evs.apply(lambda row: pd.Series(row.fp), axis=1).stack().reset_index(level=1, drop=True)
fp.name = 'code'
# fps : | event | database | code |
fps = evs.drop(['fn', 'fp'], axis=1).join(fp, how='inner').drop_duplicates()
fps['category'] = fps.apply(lambda r: get_category('fp', r.database, r.event, r.code), axis=1)

fns.groupby(['category', 'database']).code.aggregate(lambda s: set(s)).map(', '.join).to_frame()


Out[37]:
code
category database
database-specific CPRD 7L1Hz, B83z., 8532., 70041, 7L1H., 853.., BB5C., 70042, B8086, 7L1Hy, 3283., 8531., BB5B., BB5Cz...
Medicare 433.11, 433.21, 433.31, 433.81, 433.01
not-in-dnf CPRD BBa.., BBaz., G64z4, 7L1H7, 1O0.., 7L1H6, BBd9.

In [38]:
fps.groupby(['category', 'database']).code.aggregate(lambda s: set(s)).map(', '.join).to_frame()


Out[38]:
code
category database
in-dnf CPRD B7D30, Jyu9., B905z, B1z0., J6708, G63z., B905., Bz..., G572., B835., B55y., B80zz, G572z, BB14....
GePaRD I21.29, K85, D01.9, C97, C68, D48.9, D09.9, D07.4, D37.8, D44, C26.0, I63.9, I22, C80, I47.9, C6...
IPCI A79, U75
Medicare 194.8, 194.9, 199.1, 234.9, 189, 432, 195.8, 199, 239.7, 239, 236.90, 410.90, 236.7, 427.2, 194,...
other-fp CPRD R090., BB52., G57z., R0003, BB5.., B7D3., 1969., G57.., J67.., 32K3., J671., 32J.., G8y0., G5753...
GePaRD D30.3, C64.-C68.9, R40.20, D00.-D09, D37.-D48.9, R10.9, C73.-C75.9, C00.-D48.9, K90.-K93.9, D00....
IPCI K80
Medicare 223.3, 230.-234.99, 577.1, 577.9, 427, 239.-239.99, 459.0, 140.-239.99, 570.-579.99, 235.-238.99...

In [39]:
code_counts = pd.Series({
    database: len(set(mappings.all_codes(database)))
    for database in databases.databases()
})
code_counts.ix['All'] = code_counts.sum()
code_counts.index.name = 'database'

def category_label(category):
    return {
        # FN
        'not-in-dnf': 'Not in UMLS',
        'database-specific': 'DB specific',
        'next-expansion': 'expansion_{4}',
        'isolated': 'Isolated',
        # FP
        'in-dnf': 'Cosynonym',
        'other-fp': 'Indexing FP',
    }.get(category, category)

def counts(code_categories, FN_or_FP):
    "code_categories : | code | category |"
    # (database, category) | int
    s1 = code_categories.groupby('database').category.value_counts()
    # category | int
    s2 = code_categories.category.value_counts()
    s2.index = pd.MultiIndex.from_product([['Overall'], s2.index])
    res = pd.concat([s1, s2]).to_frame('count')
    res['ratio'] = res['count'] / s2.sum()
    res['%'] = res['ratio'].map('{:.1%}'.format)
    #res['% (mapping)'] = (res['count'] / code_counts).map('{:.1%}'.format)

    res = res.rename(columns={'count': '{} category'.format(FN_or_FP)}).reset_index()
    res['category'] = res['category'].map(category_label)
    res['database'] = res['database'].map(lambda db: db if db == 'Overall' else database_label(db))
    res['error-type'] = [FN_or_FP] * len(res)
    return res

fp_counts = counts(fps, 'FP')
fp_counts


Out[39]:
database category FP category ratio % error-type
0 READ-2 Cosynonym 43 0.183761 18.4% FP
1 READ-2 Indexing FP 17 0.072650 7.3% FP
2 ICD-10 Cosynonym 66 0.282051 28.2% FP
3 ICD-10 Indexing FP 30 0.128205 12.8% FP
4 ICPC-2 Cosynonym 3 0.012821 1.3% FP
5 ICPC-2 Indexing FP 1 0.004274 0.4% FP
6 ICD-9 Cosynonym 52 0.222222 22.2% FP
7 ICD-9 Indexing FP 22 0.094017 9.4% FP
8 Overall Cosynonym 164 0.700855 70.1% FP
9 Overall Indexing FP 70 0.299145 29.9% FP

In [40]:
fn_counts = counts(fns, 'FN')
fn_counts


Out[40]:
database category FN category ratio % error-type
0 READ-2 DB specific 19 0.542857 54.3% FN
1 READ-2 Not in UMLS 11 0.314286 31.4% FN
2 ICD-9 DB specific 5 0.142857 14.3% FN
3 Overall DB specific 24 0.685714 68.6% FN
4 Overall Not in UMLS 11 0.314286 31.4% FN

In [41]:
category_names = {
    'DB specific': 'No synonym in reference',
    'Indexing FP': 'No TP synonym',
    'Cosynonym': 'Sibling of TP code'
}
data = pd.concat([
    (fn_counts[fn_counts.database == 'Overall']
     .rename(columns={'FN category': 'Count'})
     .assign(Category=lambda df: df.category.map(category_names))),
    (fp_counts[fp_counts.database == 'Overall']
     .rename(columns={'FP category': 'Count'})
     .assign(Category=lambda df: df.category.map(category_names)))
])
print(df)
(sns.factorplot(kind='bar', data=data[data['error-type'] == 'FP'], x='category', y='ratio',
                legend=True, legend_out=True, size=4, ci=None))


                ICD-9    ICD-10    ICPC-2    READ-2   Average
Sensitivity  0.928571  1.000000  1.000000  0.883857  0.953107
PPV          0.462319  0.496985  0.761905  0.742634  0.615961
Out[41]:
<seaborn.axisgrid.FacetGrid at 0x7f755e2dd0f0>

In [42]:
(sns.factorplot(kind='bar', data=data[data['error-type'] == 'FN'], x='category', y='ratio',
                legend=True, legend_out=True, size=4, ci=None))


Out[42]:
<seaborn.axisgrid.FacetGrid at 0x7f755df35e80>
measures = OrderedDict([
    ('recall', measure_label('recall')),
    ('recall_in_umls', '{} to reference in UMLS'.format(measure_label('recall'))),
    ('recall_without_exclusions', '{} over inclusion codes'.format(measure_label('recall'))),
    ('recall_without_exclusions_in_umls', '{} over inclusion codes in UMLS'.format(measure_label('recall'))),
    ('', ''),
    ('precision', measure_label('precision')),
    ('precision_over_dnf', '{} over maximum recall'.format(measure_label('precision'))),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == '3-RN-CHD-RB-PAR.expand'].groupby('database')[measure].mean()\
        if measure else\
        pd.Series([0] * len(ev.database.unique()), index=ev.database.unique())
    for measure in measures
], index=measures.values())
averages_compare.columns = averages_compare.columns.map(database_label)

p = sns.color_palette(graded_recall_palette(5)[:-1] + [(1,1,1)] + graded_precision_palette(3)[:-1])
with mystyle(p, savefig='expansion3-error-analysis-by-db.pdf'):
    averages_compare.T.plot(kind='bar', title="Error analysis of 3-step expansion")
averages_compare = pd.DataFrame([
    ev[ev.variation == '3-RN-CHD-RB-PAR.expand'].groupby('event')[measure].mean()\
        if measure else\
        pd.Series([0] * len(ev.event.unique()), index=ev.event.unique())
    for measure in measures
], index=measures.values())
averages_compare.columns = averages_compare.columns.map(event_label)

p = sns.color_palette(graded_recall_palette(5)[:-1] + [(1,1,1)] + graded_precision_palette(3)[:-1])
with mystyle(p, savefig='expansion3-error-analysis-by-db.pdf', xrot=45, ha='right'):
    averages_compare.T.plot(kind='bar', title="Error analysis of 3-step expansion")
residuals = ev[ev.variation == '3-RN-CHD-RB-PAR.expand']


residuals.fn_inclusions_in_umls = residuals.fn_inclusions_in_umls\
    .fillna('NaN').map(json.loads)
def get_missed(row):
    if math.isnan(row.recall_without_exclusions_in_umls):
        return ''
    else:
        reference = set(json.loads(row.reference_inclusions_in_umls))
        return "{}/{}".format(len(row.fn_inclusions_in_umls), len(reference))

residuals['missed'] = residuals.apply(get_missed, axis=1)

residuals.fn_inclusions_in_umls = residuals.fn_inclusions_in_umls\
    .map(lambda s: ', '.join(s) if type(s) == list else 'N/A')
residuals.database = residuals.database.map(database_label)
residuals.event = residuals.event.map(event_label)
residuals.recall_without_exclusions_in_umls = residuals.recall_without_exclusions_in_umls\
    .map('{:.2f}'.format)
residuals = residuals.sort_index(by=['database', 'event']).reset_index(drop=True)

residuals = residuals[['database', 'event', 'recall_without_exclusions_in_umls', 'missed', 'fn_inclusions_in_umls']]
residuals.columns = ["Database", "Event", "Recall", "Missed", "Residual FNs"]

#residuals = residuals.set_index(['Database', 'Event'])["Residual FNs"].unstack()
residuals

Removing unused codes

compare_variations = OrderedDict([
    ('baseline', 'Baseline'),
    ('baseline.filter-gen', 'Filtered'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('database').precision.mean()
    for variation in compare_variations
], index = compare_variations.values())
averages_compare.columns = averages_compare.columns.map(database_label)

with mystyle(graded_precision_palette(len(compare_variations)), savefig='filtered-baseline-precision-by-db.pdf'):
    averages_compare.T.plot(kind='bar', title="Filtered baseline")
    plt.ylabel(measure_label('precision'))
compare_variations = OrderedDict([
    ('baseline', 'Baseline'),
    ('baseline.filter-gen', 'Filtered'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('event').precision.mean()
    for variation in compare_variations
], index = compare_variations.values())
averages_compare.columns = averages_compare.columns.map(event_label)

with mystyle(graded_precision_palette(len(compare_variations)), xrot=45, ha='right', savefig='filtered-baseline-precision-by-event.pdf'):
    averages_compare.T.plot(kind='bar', title="Filtered baseline")
    plt.ylabel(measure_label('precision'))
compare_variations = OrderedDict([
    ('3-RN-CHD-RB-PAR.expand', 'Expand 3 steps'),
    ('3-RN-CHD-RB-PAR.expand.filter-gen', 'Expand 3 steps, filter'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('database').precision.mean()
    for variation in compare_variations
], index = compare_variations.values())
averages_compare.columns = averages_compare.columns.map(database_label)

with mystyle(graded_precision_palette(len(compare_variations)), savefig='filtered-expansion3-precision-by-db.pdf'):
    averages_compare.T.plot(kind='bar', title="Filtered expansion")
    plt.ylabel(measure_label('precision'))
compare_variations = OrderedDict([
    ('3-RN-CHD-RB-PAR.expand', 'Expand 3 steps'),
    ('3-RN-CHD-RB-PAR.expand.filter-gen', 'Expand 3 steps, filter'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('event').recall.mean()
    for variation in compare_variations
], index = compare_variations.values())
averages_compare.columns = averages_compare.columns.map(event_names.get)

with mystyle(graded_recall_palette(len(compare_variations)), xrot=45, ha='right', savefig='filtered-expansion3-recall-by-event.pdf'):
    averages_compare.T.plot(kind='bar', title="Filtered 3-step expansion")
    plt.ylabel(measure_label('recall'))
compare_variations = OrderedDict([
    ('3-RN-CHD-RB-PAR.expand', 'Expand 3 steps'),
    ('3-RN-CHD-RB-PAR.expand.filter-gen', 'Expand 3 steps, filter'),
])
averages_compare = pd.DataFrame([
    ev[ev.variation == variation].groupby('event').precision.mean()
    for variation in compare_variations
], index = compare_variations.values())
averages_compare.columns = averages_compare.columns.map(event_names.get)

with mystyle(graded_precision_palette(len(compare_variations)), xrot=45, ha='right', savefig='filtered-expansion3-precision-by-event.pdf'):
    averages_compare.T.plot(kind='bar', title="Filtered 3-step expansion")
    plt.ylabel(measure_label('precision'))
measures = ['recall', 'precision']
averages_compare = pd.DataFrame([
    ev[ev.variation == '3-RN-CHD-RB-PAR.expand.filter-gen'].groupby('database')[measure].mean()
    for measure in measures
], index=map(measure_label, measures))
averages_compare.columns = averages_compare.columns.map(database_label)
#averages_compare.index = compare_variations.values()

with mystyle(measures_palette, savefig='filtered-expansion3-performance-by-db.pdf'):
    averages_compare.T.plot(kind='bar', title="Performance of filtered 3-step expansion")

The drop in PPV for Myocardial infarction is caused by the mapping to codes 410.* (Acute myocardial infarction) in Medicare which is not used in the ARS database.

Codes in reference mappings, not in databases

Codes that might be removed from the TP when filtering.

stats = DataFrame()
stats['In ref'] = code_stats[code_stats.InMapping]\
    .groupby('Database').Code.count()
stats['Not in DB'] = code_stats[code_stats.InMapping & ~code_stats.InDatabase]\
    .groupby('Database').Code.count()
stats.fillna(0, inplace=True)
stats['%'] = (stats['Not in DB'] / stats['In ref']).map("{:.2%}".format)
stats['Codes'] = code_stats[code_stats.InMapping & ~code_stats.InDatabase]\
    .groupby('Database').Code.aggregate(lambda vs: ', '.join(set(vs)))
stats

In [ ]:


In [ ]: